
clear
set more off

 
*do "${programs}/ipums_to_dta.do"

use "${raw}/May_matching/cps_00058.dta",clear
tab year
drop if year==1976 | year==1983

drop if (hrhhid/10000000)<1

// sample 865,797 
g grdatn=.
replace grdatn=0 if higrade==10
replace grdatn=1 if inlist(higrade,31,40)
replace grdatn=2 if inlist(higrade,41,50)
replace grdatn=3 if inlist(higrade,51,60)
replace grdatn=4 if inlist(higrade,61,70)
replace grdatn=5 if inlist(higrade,71,80)
replace grdatn=6 if inlist(higrade,81,90)
replace grdatn=7 if inlist(higrade,91,100)
replace grdatn=8 if inlist(higrade,101,110)
replace grdatn=9 if inlist(higrade,111,120)
replace grdatn=10 if inlist(higrade,121,130)
replace grdatn=11 if inlist(higrade,131,140)
replace grdatn=12 if inlist(higrade,141,150)
replace grdatn=13 if inlist(higrade,151,160)
replace grdatn=14 if inlist(higrade,161,170)
replace grdatn=15 if inlist(higrade,171,180)
replace grdatn=16 if inlist(higrade,181,190)
replace grdatn=17 if inlist(higrade,191,200)
replace grdatn=18 if inlist(higrade,201,210)

duplicates tag mish hrhhid year statecensus age race sex  ind occ relate grdatn,g(dups)

count if dups>0 // 4033 remaining duplicates
tab age if dups>0 // 92% are under 18

// Match arbitrarily to observably similar observations
egen id = group(hrhhid year statecensus mish age race sex ind occ relate grdatn)
bys id: g matchvar = _n
drop id

save "${wd}/May_matching/ipums_match_data.dta",replace




